QuantBio Single-cell RNA-Seq Exploratory analysis

Jen Modliszewski, Ph.D | QuantBio, LLC

05 April 2022

Purpose

This script is intended to be run as an initial first pass on unfiltered datasets. Users may want to inspect sample distributions and/or run the script interactively to get a feel for the data.

Setup

Parameters

Parameter Value
parallel multicore
seurat_raw ./../processedData/Su_7142_210727B6_raw.rds
organism mouse
sample_id SampleID
decontamX_detect TRUE
decontamX_filter TRUE
decontamX_thresh 0.6
doublet_detect TRUE
doublet_filter TRUE
filter hard
nFeature.ll 500
nFeature.ul 5000
percent.mt.ul 10
nCount.ll 2000
nCount.ul 40000
transform standard
var_regress none
cell_cycle TRUE
cluster_algorithm Louvain2
cluster_resolution c(0.15, 0.2, 0.5, 0.8, 1.1, 1.4)
anno_db c(“ImmGen”, “hpca”)
pref_ann ImmGen
biomart_rds ./../processedData/h2m_biomart.rds
ann_collapse_n 200
downsample FALSE
downsample_cells 20000
exp_metadata ./../processedData/scRNAseq_metadata_clean.csv
seurat_processed ./results/QB_SingleCell_qQC/seurat_05Apr2022_0418UTC_10k.rds
num_var_feature 10000
# Determining if doublet detecting and filtering will occur
if (isTRUE(params$doublet_filter)){
  dbl <- params$doublet_detect
  dbl_dtct=TRUE
} else {
  if (isFALSE(params$doublet_detect)) {
    dbl_dtct=FALSE
  } else {dbl_dtct=TRUE}
}

if (isTRUE(params$decontamX_filter)){
  dcx_dtct=TRUE
} else {
  if (isFALSE(params$decontamX_detect)) {
    dcx_dtct=FALSE
  } else {dcx_dtct=TRUE}
}


if (params$var_regress=="none"){
  var_regress=NULL
} else {
  var_regress <- params$var_regress
}

if ("cc" %in% params$var_regress) {
  cc_regress=TRUE
  var_regress=NULL
} else {
  cc_regress=FALSE
}

# This feature not yet supported.
if (!isFALSE(params$exp_metadata)) {
  cov_analysis <- TRUE
} else {
  cov_analysis <- FALSE
 }

if (params$parallel=="multicore") {
  bpparam <- MulticoreParam()
} else  {
  bpparam <- SerialParam()
}

Output Files


  • The final seurat file will be saved to: ./results/QB_SingleCell_qQC/seurat_05Apr2022_0418UTC_10k.rds

Load data

seurat_raw <- readRDS(params$seurat_raw)

Plot setup

theme_sara <- function(){
  theme_bw(base_size=10)+
    theme(axis.text=element_text(color="black"),
          panel.background=element_rect(color="white"),
          strip.text = element_text(size=12),
          strip.background = element_rect(fill="white"))
}

theme_sara_90 <- function() {
  theme_bw(base_size=18)+
    theme(axis.text.x=element_text(angle=90,hjust = 1,vjust = 0.5),
          axis.text=element_text(color="black"),
          panel.background=element_rect(color="black"),
          strip.text = element_text(size=12),
          strip.background = element_rect(fill="white"))
}
sid <- params$sample_id
seurat_raw@meta.data$SampleID <- seurat_raw@meta.data[[sid]]

s.cols <- c(rcartocolor::carto_pal(n=4, "Teal"), 
            rcartocolor::carto_pal(n=4, "Emrld"), 
            rcartocolor::carto_pal(n=4, "BurgYl"), 
            rcartocolor::carto_pal(n=4, "PinkYl"), 
            rcartocolor::carto_pal(n=4, "Purp"))

num_samples <- length(levels(as.factor(seurat_raw@meta.data$SampleID)))
if (num_samples <= 20) {
  sample.cols <- sample(s.cols, size=num_samples, replace=FALSE)
} else {
  sample.cols <- colorRampPalette(s.cols)(num_samples)
}

names(sample.cols) <- levels(as.factor(seurat_raw@meta.data$SampleID))


heatmap.colfun <- microViz::heat_palette(
                  palette = "Lisbon",
                  breaks = 100,
                  range = c(0,1),
                  sym = FALSE,
                  rev = FALSE)

heatmap.colfun <- microViz::heat_palette(
                  palette = "Lisbon",
                  breaks = 100,
                  range = c(0,1),
                  sym = FALSE,
                  rev = FALSE)

dbl.cols <- c(`eSinglet`="snow2", `eDoublet`="#EE1289")
dcx.cols <- c(`Native`="azure2", `Ambient`="chartreuse1")
unikn_blue_red.cols <- c(rev(pal_karpfenblau[[5]]), "white", pal_bordeaux[[5]])


ref.cols <- list()

ref.cols[["SingleR_ImmGen"]]  <- c(`B cells` = "#F0A0FF", 
                                   `Basophils` = "#9370DB",
                                    `Eosinophils`= "#B452CD",
                                   `DC` = "#9DCC00", 
                                   `Epithelial cells` = "#87CEFA",  
                                   `Endothelial cells` = "#6495ED",
                                   `Fibroblasts` = "navy",
                                   `ILC` = "#191919", 
                                   `Macrophages` = "#1C8356", 
                                   `Mast cells` = "#E9Debb", 
                                   `Monocytes` = "#81C57A", 
                                   `Microglia` = "#EEB4B4",
                                   `Neutrophils` = "#b10da1",
                                   `NK cells` = "#8F7C00", 
                                   `NKT` = "#FEAF16", 
                                   `Others` = "gray40", 
                                   `Stem cells` = "red", 
                                   `Stromal cells` = "#F08080",
                                   `T cells` = "#FE902EFF", 
                                   `Tgd` = "#FFEE33")

ref.cols[["SingleR_HPCA"]] <- c(`Astocyte`="grey",
                                `B_cell`="#F0A0FF",
                                `BM`="chocolate4",
                                `BM & Prog.`="bisque3",
                                `CMP`="#DC8F71",
                                 `DC`="#9DCC00",  
                                `Endothelial_cells`="#6495ED",
                                `Epithelial_cells`="#87CEFA",
                                `Erythroblast`="red4",
                                `Fibroblasts`="navy",
                                `Gametocytes`="mediumpurple1",
                                `HSC`="red",
                                `Macrophage`="#1C8356",
                                `Monocyte`="#81C57A",
                                `Myelocyte`="green", 
                                `Neutrophils`="#b10da1",
                                `NK_cell`="#8F7C00",
                                `Osteoblasts`="cyan3",
                                `Platelets`="#A5DAC8",
                                `T_cells`="#FE902EFF",
                                `Others` = "gray40")
ref.cols[["SingleR_hpca"]] <- ref.cols[["SingleR_HPCA"]]                                

QC

Calculate Percent Mitochondial genes to be used in downstream filtering

# the prefixes below should work for human or mouse.
seurat_raw <- PercentageFeatureSet(seurat_raw, pattern = "^MT-|^mt-", col.name = "percent.mt")

Create 20k cell subsetted seurat for multimode test. The raw data will be downsampled after filtering.

seurat_raw_20k <- subset(seurat_raw, downsample=20000)

Distributions

Look for samples with skewed, abnormal or bimodal distributions

p1 <- RidgePlot(seurat_raw, group.by="SampleID", 
                features="nFeature_RNA", 
                same.y.lims=TRUE, sort=TRUE, log=TRUE)
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1 <- p1 + scale_x_continuous(labels = scales::scientific)
p1 <- p1 + theme(axis.text=element_text(size=11))

p2 <- RidgePlot(seurat_raw, group.by="SampleID", 
                features="nCount_RNA", 
                same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=sample.cols)
p2 <- p2 + scale_x_continuous(labels = scales::scientific, n.breaks=4)
p2 <- p2 + theme(axis.text=element_text(size=11))

mt.ul <- max(seurat_raw@meta.data$percent.mt)+5
p3 <- RidgePlot(seurat_raw, group.by="SampleID",
                features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=sample.cols)
p3 <- p3 + xlim(0, mt.ul)
p3 <- p3 + theme(axis.text=element_text(size=11))

cowplot::plot_grid(p1 + theme(legend.position="none", axis.title.y=element_blank()),
                           p2 + theme(legend.position="none", axis.title.y=element_blank()),
                           p3 + theme(legend.position="none", axis.title.y=element_blank()),
                           align = 'vh',
                           labels = c("A", "B", "C"),
                           hjust = -1,
                           nrow = 1)

Modality test

features <- c("nFeature_RNA", "nCount_RNA", "percent.mt")
names(features) <- features
modetest.list <- BiocParallel::bplapply(features, 
                   function(f) multimode::modetest(seurat_raw_20k@meta.data[[f]]),
                   BPPARAM=MulticoreParam())

pvalues_logical.list <- lapply(modetest.list, function(mt) mt[["p.value"]] <= 0.05)
                   
lapply(modetest.list, function(t) t)
## $nFeature_RNA
## 
##  Ameijeiras-Alonso et al. (2019) excess mass test
## 
## data:  seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.034961, p-value < 2.2e-16
## alternative hypothesis: true number of modes is greater than 1
## 
## 
## $nCount_RNA
## 
##  Ameijeiras-Alonso et al. (2019) excess mass test
## 
## data:  seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.0086293, p-value < 2.2e-16
## alternative hypothesis: true number of modes is greater than 1
## 
## 
## $percent.mt
## 
##  Ameijeiras-Alonso et al. (2019) excess mass test
## 
## data:  seurat_raw_20k@meta.data[[f]]
## Excess mass = 0.002569, p-value = 0.382
## alternative hypothesis: true number of modes is greater than 1
features.test <- features[unlist(pvalues_logical.list)]

modes.list <- BiocParallel::bplapply(features.test, 
                     function(f) multimode::locmodes(seurat_raw@meta.data[[f]],
                                                     mod0=2,
                                                     display=FALSE),
                     BPPARAM=bpparam)

antimode.list <- lapply(modes.list, 
                        function(m) round(m$locations[[2]], digits=2))

Inspect the distributions - if the antimode falls in the valley, it is potentially a good cutoff.

p1 <- ggplot(data=seurat_raw@meta.data, aes(x=nFeature_RNA)) 
p1 <- p1 + geom_density(fill="slateblue4") + labs(x="Number of genes")
if (isTRUE(pvalues_logical.list[["nFeature_RNA"]])) {
  p1 <- p1 + geom_vline(xintercept=antimode.list[["nFeature_RNA"]])
  p1 <- p1 + labs(caption=paste("antimode =", antimode.list[["nFeature_RNA"]]))
} else {
  p1 <- p1 + labs(caption="No antimode to display")
}
p1 <- p1 + theme(plot.caption=element_text(face="italic"))

p2 <- ggplot(data=seurat_raw@meta.data, aes(x=nCount_RNA))
p2 <- p2 + geom_density(fill="magenta4") + labs(x="Number of UMIs") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["nCount_RNA"]])) {
  p2 <- p2 + geom_vline(xintercept=antimode.list[["nCount_RNA"]])
  p2 <- p2 + labs(caption=paste("antimode =", antimode.list[["nCount_RNA"]]))
} else {
  p2 <- p2 + labs(caption="No antimode to display")
}
p2 <- p2 + theme(plot.caption=element_text(face="italic"))

mt.ul <- max(seurat_raw@meta.data$percent.mt)+2
p3 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt))
p3 <- p3 + geom_density(fill="olivedrab3") + labs(x="% mitochondrial genes") + theme(axis.title.y=element_blank())
if (isTRUE(pvalues_logical.list[["percent.mt"]])) {
  p3 <- p3 + geom_vline(xintercept=antimode.list[["percent.mt"]])
  p3 <- p3 + labs(caption=paste("antimode =", antimode.list[["percent.mt"]]))
} else {
  p3 <- p3 + labs(caption="No antimode to display")
}
p3 <- p3 + theme(plot.caption=element_text(face="italic"))
p3 <- p3 +xlim(0, mt.ul)

cowplot::plot_grid(p1,
                   p2,
                   p3,
                   align = 'vh',
                   labels = c("A", "B", "C"),
                   hjust = -1,
                   nrow = 1)

Stats

Calculate median, median absolute deviation (mad), and outliers. Look for samples that are outliers.

median.list <- lapply(features, 
                       function(f) median(seurat_raw@meta.data[[f]], 
                                          na.rm=TRUE))

mad.list <- lapply(features,
                   function(f) mad(seurat_raw@meta.data[[f]],
                                      na.rm=TRUE))

outliers.list <- lapply(features,
                        function(f) scater::isOutlier(seurat_raw@meta.data[[f]], 
                                                      type="both"))
outliers.list <- lapply(outliers.list, function(o) attr(o, "thresholds"))
sample.median.df <- seurat_raw@meta.data %>%
                       group_by(SampleID) %>%
                       summarize(nFeature_RNA_median=median(nFeature_RNA, na.rm=TRUE),
                                 nCount_RNA_median=median(nCount_RNA, na.rm=TRUE),
                                 percent.mt_median=median(percent.mt, na.rm=TRUE))  

sample.median.df$nFeature_RNA_outlr <- ifelse(sample.median.df$nFeature_RNA_median <= median.list[["nFeature_RNA"]] - 3*mad.list[["nFeature_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nFeature_RNA"]] + 3*mad.list[["nFeature_RNA"]],
                                               "FLAG",
                                               "OK")



sample.median.df$nCount_RNA_outlr <- ifelse(sample.median.df$nCount_RNA_median <= median.list[["nCount_RNA"]] - 3*mad.list[["nCount_RNA"]] || sample.median.df$nFeature_RNA_median >= median.list[["nCount_RNA"]] + 3*mad.list[["nCount_RNA"]],
                                               "FLAG",
                                               "OK")

sample.median.df$percent.mt_outlr <- ifelse(sample.median.df$percent.mt_median >= median.list[["percent.mt"]] + 3*mad.list[["percent.mt"]],
                                               "FLAG",
                                               "OK")

kable(sample.median.df, booktabs = TRUE) %>%
  kable_styling(font_size = 12)
SampleID nFeature_RNA_median nCount_RNA_median percent.mt_median nFeature_RNA_outlr nCount_RNA_outlr percent.mt_outlr
770128 3177.0 11656.0 3.204618 OK OK OK
770163 2935.0 11111.5 2.043499 OK OK OK
770174 3149.0 13141.0 2.396746 OK OK OK
770184 1967.0 5575.0 4.338221 OK OK OK
770200 2870.0 9982.0 4.486458 OK OK OK
770203 1510.5 4255.5 2.682422 OK OK OK
770216 2255.0 6919.0 3.052563 OK OK OK
770223 2313.0 7512.0 4.445599 OK OK OK
770224 3054.0 10652.0 4.864111 OK OK OK
770225 1493.0 3557.0 3.825524 OK OK OK
770226 2578.5 8665.0 3.463999 OK OK OK
770227 2474.0 8087.5 3.443354 OK OK OK
770231 3185.0 11553.0 3.050032 OK OK OK
770232 3081.0 12380.0 2.700411 OK OK OK
770238 2229.5 6877.0 5.361471 OK OK OK
770240 926.0 1759.5 3.589263 OK OK OK
770264 2945.0 11507.5 2.264529 OK OK OK
770267 947.0 1902.0 2.812986 OK OK OK
770271 2522.5 9251.5 3.257304 OK OK OK
770279 1795.0 5992.0 3.531969 OK OK OK

Set filtering limits

Setting limits based on parameters. Cells will be filtered for doublets and ambient RNA after doublet detection and ambient RNA detection, if performed.

if (params$filter=="multimodal") {
  if (modetest.list[["nFeature_RNA"]][["p.value"]] <= 0.05) {
    nFeature_RNA.ll <- round(antimode.list[["nFeature_RNA"]], digits=0)
  } else {
    nFeature_RNA.ll <- round(median.list[["nFeature_RNA"]]-3*mad.list[["nFeature_RNA"]], digits=0)
  }
  # use hard for other limits
  nFeature_RNA.ul <-  params$nFeature.ul
  nCount_RNA.ul <- params$nCount.ul
  percent.mt.ul <- params$percent.mt.ul
}  
  
if (params$filter=="mad") {
  nFeature_RNA.ll <- round(median.list[["nFeature_RNA"]]-3*mad.list[["nFeature_RNA"]],
                           digits=0)
  nFeature_RNA.ul <-  round(median.list[["nFeature_RNA"]]+3*mad.list[["nFeature_RNA"]],
                            digits=0)
  percent.mt.ul <- round(median.list[["percent.mt"]]+3*mad.list[["percent.mt"]],
                         digits=0)
  nCount_RNA.ul <-  round(median.list[["nCount_RNA"]]+3*mad.list[["nCount_RNA"]],
                          digits=0)
}

if (params$filter=="hard") {
  nFeature_RNA.ll <- params$nFeature.ll
  nFeature_RNA.ul <- params$nFeature.ul
  nCount_RNA.ul <- params$nCount.ul
  percent.mt.ul <- params$percent.mt.ul
}

# nCount stays the same regardless
nCount_RNA.ll <- params$nCount.ll

Below are the filtering thresholds


  • Median nFeature 2130
  • MAD nFeature 1813.2198
  • Median nCount_RNA 6413
  • MAD nCount_RNA 7458.9606
  • Median percent.mt 3.3044197
  • MAD percent.mt 2.1523486

  • Lower limit for number of genes 500
  • Upper limit for number of genes 5000
  • Lower limit for number of UMIs 2000
  • Upper limit for number of UMIs 40000
  • Upper limit for percentage mt 10

Doublet detection

Prior to doublet detection a minimum amount of filtering is performed. If cells will be downsampled, a subset of cells are utilized for doublet detection. Otherwise, doublet detection is performed on entire data set. Doublets are identified from each sample individually.

Currently, doublet detection is performed via the R package scds. The hybrid score utilized here incorporates two doublet scores: - bcds - artificial doublets are created and a doublet classifier is trained. Cells are assigned a probability of falling into the artificial doublet class. - cxds - expected co-expression of gene-pairs based on frequency is utilized to assign a rank-order based score to all gene-pairs. Scores for co-occuring gene-pairs for each cell are summed. The hybrid score is derived by summing the normalized bcds and cxds scores.

# Seurat was the only package that provided workable code for conversion between sce and seurat objects
seurat_dbl <- seurat_raw
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$nFeature_RNA < 200, "remove", "Keep")
seurat_dbl@meta.data$rmv <- ifelse(seurat_dbl@meta.data$percent.mt >= 20, "remove", seurat_dbl@meta.data$rmv)
seurat_dbl <- subset(seurat_dbl, subset=`rmv`=="Keep")
if (isTRUE(params$downsample)) {
  ds=params$downsample_cells*2
  seurat_dbl <- subset(seurat_dbl, downsample=ds)
}
samples <- seurat_dbl@meta.data$SampleID
seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
sce.list <- lapply(seurat.list, 
                   function(s) Seurat::as.SingleCellExperiment(s))
sce.list <- lapply(sce.list, 
                   function(sce) scds::cxds_bcds_hybrid(sce))
## [04:22:47] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:23:36] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:24:30] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:25:21] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:26:06] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:26:41] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:27:20] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:28:02] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:28:51] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:30:02] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:30:57] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:31:45] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:32:42] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:33:14] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:33:26] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:34:09] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:35:05] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:37:08] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:38:27] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
## [04:39:20] WARNING: amalgamation/../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
seurat.list <- lapply(sce.list,
                      function(sce) Seurat::as.Seurat(sce,
                                                      counts = "counts"))
num_seurat_obj <- length(seurat.list)
seurat_raw <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])

seurat_raw@meta.data$hybrid_call <- ifelse(seurat_raw@meta.data$hybrid_score >= 1, "eDoublet", "eSinglet")
p <- ggplot(data=seurat_raw@meta.data, aes(x=hybrid_score))
p <- p + geom_density(fill=dbl.cols[["eDoublet"]], color="#AA005FFF")
p <- p + theme_sara() + labs(x="scds hybrid score")
p

Sample plots

Boxplots

A look at the distribution of doublet scores

dat <- as.data.frame(seurat_raw@meta.data)
dbl_data.df <- data.frame(SampleID=dat$SampleID,
                          hybrid_score=dat$hybrid_score,
                          hybrid_call=dat$hybrid_call)

median.df <- dbl_data.df %>%
              group_by(SampleID) %>%
              summarize(median_h=median(hybrid_score, na.rm=TRUE))
median.dt <- setDT(median.df)

# Get x-axis orders
setorder(median.dt, median_h)
h.ord <- median.dt$SampleID

p1 <- ggplot(data=dbl_data.df, aes(y=hybrid_score, x=SampleID, fill=SampleID))
p1 <- p1 + geom_boxplot() + labs(y="scds hybrid score")
p1 <- p1 + theme_sara() + scale_x_discrete(limits=h.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
                 axis.title.x=element_blank(),
                 legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1

Hybrid call

dbl_data.df$hybrid_call <- factor(dbl_data.df$hybrid_call, levels=c("eSinglet", "eDoublet"))
p <-  ggplot(data=dbl_data.df, aes(x=SampleID, fill=hybrid_call))
p <- p + geom_bar(position="fill", color="black") 
p <- p + scale_fill_manual(values=dbl.cols, name="Hybrid call") 
p <- p + theme_sara()
p <- p + theme(axis.title.x = element_blank(),
               axis.text.x = element_text(angle=90))
p

Density plot

p1 <- ggplot(data=dbl_data.df, aes(x=hybrid_score, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + theme_sara() + labs(x="scds hybrid score")
p1 <- p1 + theme(axis.title.y=element_blank(),
                 legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1

Decontamination

This function is recently incorporated and not completely recommended unless you know what you are doing. The DecontX package is utilized for contaminant detection. A threshold of 0.5 is recommended. DecontX can be run with all samples, with each sample as a batch. If trouble with decontamination continues and raw counts are available, consider using those as background.

sce <- as.SingleCellExperiment(seurat_raw)
sce <- celda::decontX(sce, batch=sce$SampleID)

# If you do not reset these, bad things will happen. 
sce$nCount_RNA <- NULL
sce$nFeature_RNA <- NULL

if (isTRUE(params$decontamX_filter)) {
  r <- round(decontXcounts(sce))
  seurat_raw <- CreateSeuratObject(counts=r,
                                 meta.data=as.data.frame(colData(sce)))
} else {
  seurat_raw <- CreateSeuratObject(counts=counts(sce),
                                 meta.data=as.data.frame(colData(sce)))
}

seurat_raw@meta.data$decontX_contamination <- sce$decontX_contamination
seurat_raw@meta.data$contam_status <- ifelse(seurat_raw@meta.data$decontX_contamination >= params$decontamX_thresh, 
                                           "Ambient", 
                                           "Native")
p <- ggplot(data=seurat_raw@meta.data, aes(x=decontX_contamination))
p <- p + geom_density(fill=dcx.cols[["Ambient"]], color="#62C707FF")
p <- p + theme_sara() + labs(x="Percent contamination")
p

Sample plots

Boxplots

A look at the distribution of decontamination scores. The score ranges from 0 to 1 and refers to the percentage of cells that are contaminated.

dat <- as.data.frame(seurat_raw@meta.data)

median.df <- dat %>%
              group_by(SampleID) %>%
              summarize(median_X=median(decontX_contamination, na.rm=TRUE))
median.dt <- setDT(median.df)

# Get x-axis orders
setorder(median.dt, median_X)
X.ord <- median.dt$SampleID

p1 <- ggplot(data=dat, aes(y=decontX_contamination, x=SampleID, fill=SampleID))
p1 <- p1 + geom_boxplot()
p1 <- p1 + labs(y="Percent contamination")
p1 <- p1 + theme_sara() + scale_x_discrete(limits=X.ord)
p1 <- p1 + theme(axis.text.x=element_text(angle=90),
                 axis.title.x=element_blank(),
                 legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1

Contaminated threshold

dat$contam_status <- factor(dat$contam_status, levels=c("Native", "Ambient"))
p <-  ggplot(data=dat, aes(x=SampleID, fill=contam_status))
p <- p + labs(y="Percent contamination")
p <- p + geom_bar(position="fill", color="black") + scale_x_discrete(limits=X.ord)
p <- p + scale_fill_manual(values=dcx.cols, name="Contamination status") 
p <- p + theme_sara()
p <- p + theme(axis.title.x = element_blank(),
               axis.text.x=element_text(angle=90))
p

Density plot

p1 <- ggplot(data=dat, aes(x=decontX_contamination, y=SampleID, fill=SampleID))
p1 <- p1 + ggridges::geom_density_ridges()
p1 <- p1 + labs(x="Percent contamination")
p1 <- p1 + theme_sara()
p1 <- p1 + theme(axis.title.y=element_blank(),
                 legend.position="none")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1

Filtered cells

If doublet detection was performed (and data will be downsampled), the cells are already downsampled to 80k and filtered to exclude percent mitochondrial genes above 20% and number of genes below 200.

seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$nFeature_RNA < nFeature_RNA.ll | seurat_raw@meta.data$nFeature_RNA > nFeature_RNA.ul,
                                      "Remove",
                                      "Keep")
# Percent mt
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$percent.mt > percent.mt.ul,
                                      "Remove",
                                      seurat_raw@meta.data$Filter)


#nCount_RNA
seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$nCount_RNA < nCount_RNA.ll | seurat_raw@meta.data$nCount_RNA > nCount_RNA.ul, 
                                      "Remove", 
                                      seurat_raw@meta.data$Filter)


if (isTRUE(params$doublet_filter)) { 
    seurat_raw@meta.data$Filter <-  ifelse(seurat_raw@meta.data$hybrid_call=="eDoublet",
                                           "Remove", 
                                            seurat_raw@meta.data$Filter)
}

if (isTRUE(params$decontamX_filter)) { 
     seurat_raw@meta.data$Filter <- ifelse(seurat_raw@meta.data$contam_status=="Ambient",
                                           "Remove", 
                                            seurat_raw@meta.data$Filter)
}


summary(as.factor(seurat_raw@meta.data$Filter))
##   Keep Remove 
##  96281  83676

All metrics

keep_remove.cols <- c(`Keep`="lavenderblush3",
                      `Remove`="#DC143C")

p1 <- ggplot(data=seurat_raw@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=Filter))
p1 <- p1 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept=nCount_RNA.ul)
p1 <- p1 + theme_sara()
p1 <- p1 + scale_color_manual(values=keep_remove.cols)

p2 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt, y=nFeature_RNA, color=Filter))
p2 <- p2 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) + geom_vline(xintercept = percent.mt.ul)
p2 <- p2 + theme_sara()
p2 <- p2 + scale_color_manual(values=keep_remove.cols)

p3 <- ggplot(data=seurat_raw@meta.data, aes(x=percent.mt, y=nCount_RNA, color=Filter))
p3 <- p3 + geom_point() + geom_vline(xintercept = percent.mt.ul) + geom_hline(yintercept=nCount_RNA.ul)
p3 <- p3 + geom_hline(yintercept=nCount_RNA.ll)
p3 <- p3 + theme_sara()
p3 <- p3 + scale_color_manual(values=keep_remove.cols)

legend <- cowplot::get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 12)))

prow <- cowplot::plot_grid(p1 + theme(legend.position="none"),
                           p2 + theme(legend.position="none"),
                           p3 + theme(legend.position="none"),
                           align = 'vh',
                           labels = c("A", "B", "C"),
                           hjust = -1,
                           nrow = 1)
cowplot::plot_grid(prow, legend, rel_widths = c(4, .6))

Idents(seurat_raw) <- seurat_raw@meta.data$orig.ident
seurat <- subset(seurat_raw, subset=Filter=="Keep")

Check filtering

It never hurts.

Scatterplots

p1 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=Filter))
p1 <- p1 + geom_point() 
p1 <- p1 + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul)
p1 <- p1 + geom_vline(xintercept=nCount_RNA.ll) + geom_vline(xintercept=nCount_RNA.ul)
p1 <- p1 + theme_sara()
p1 <- p1 + scale_color_manual(values=keep_remove.cols)

p2 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nFeature_RNA, color=Filter))
p2 <- p2 + geom_point() + geom_hline(yintercept=nFeature_RNA.ll) + geom_hline(yintercept=nFeature_RNA.ul) 
p2 <- p2 + geom_vline(xintercept = percent.mt.ul)
p2 <- p2 + theme_sara()
p2 <- p2 + scale_color_manual(values=keep_remove.cols)

p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt, y=nCount_RNA, color=Filter))
p3 <- p3 + geom_point() + geom_vline(xintercept = percent.mt.ul)
p3 <- p3 + geom_hline(yintercept=nCount_RNA.ul) + geom_hline(yintercept=nCount_RNA.ll)
p3 <- p3 + theme_sara()
p3 <- p3 + scale_color_manual(values=keep_remove.cols)

legend <- cowplot::get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 12)))

prow <- cowplot::plot_grid(p1 + theme(legend.position="none"),
                           p2 + theme(legend.position="none"),
                           p3 + theme(legend.position="none"),
                           align = 'vh',
                           labels = c("A", "B", "C"),
                           hjust = -1,
                           nrow = 1)
cowplot::plot_grid(prow, legend, rel_widths = c(4, .6))

Density plots

p1 <- ggplot(data=seurat@meta.data, aes(x=nFeature_RNA)) 
p1 <- p1 + geom_density(fill="slateblue4") + labs(x="Number of genes")

p2 <- ggplot(data=seurat@meta.data, aes(x=nCount_RNA))
p2 <- p2 + geom_density(fill="magenta4") + labs(x="Number of UMIs") + theme(axis.title.y=element_blank())

p3 <- ggplot(data=seurat@meta.data, aes(x=percent.mt)) 
p3 <- p3 + geom_density(fill="olivedrab3") + labs(x="% mitochondrial genes") + theme(axis.title.y=element_blank())

prow <- cowplot::plot_grid(p1,
                           p2,
                           p3,
                           align = 'vh',
                           labels = c("A", "B", "C"),
                           hjust = -1,
                           nrow = 1)
cowplot::plot_grid(prow, rel_widths = c(4, .4))

Inspect samples after filtering

Look for outlier samples to make note of.

meta.df <- as.data.table(seurat@meta.data)
summary.df <- meta.df %>% 
              dplyr::group_by(SampleID) %>% 
              dplyr::summarise(n=n(),
                               median_nFeature=median(nFeature_RNA, na.rm=TRUE),
                               medan_nCount=median(nCount_RNA, na.rm=TRUE),
                               median_percentMT=median(percent.mt, na.rm=TRUE))
setDT(summary.df)
summary.melt.df <- data.table::melt(summary.df, id.vars="SampleID")
setorder(summary.df, n)
x.ord <- summary.df$SampleID

p1 <- ggplot(data=summary.df, aes(x=SampleID, y=n, fill=SampleID))
p1 <- p1 + geom_bar(stat="identity", color="black")
p1 <- p1 + scale_fill_manual(values=sample.cols)
p1 <- p1 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p1 <- p1 + scale_x_discrete(limits=x.ord)
p1 <- p1 + labs(y="Number of cells")

p2 <- ggplot(data=meta.df, aes(x=SampleID, y=nFeature_RNA, fill=SampleID))
p2 <- p2 + geom_boxplot()
p2 <- p2 + scale_fill_manual(values=sample.cols)
p2 <- p2 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p2 <- p2 + scale_x_discrete(limits=x.ord)
p2 <- p2 + labs(y="Number of Genes (nFeature_RNA)")

p3 <- ggplot(data=meta.df, aes(x=SampleID, y=nCount_RNA, fill=SampleID))
p3 <- p3 + geom_boxplot()
p3 <- p3 + scale_fill_manual(values=sample.cols)
p3 <- p3 + theme_sara_90() + theme(axis.text=element_text(size=9), axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p3 <- p3 + scale_x_discrete(limits=x.ord)
p3 <- p3 + labs(y="Number of UMIs (nCount_RNA)")

p4 <- ggplot(data=meta.df, aes(x=SampleID, y=percent.mt, fill=SampleID))
p4 <- p4 + geom_boxplot()
p4 <- p4 + scale_fill_manual(values=sample.cols)
p4 <- p4 + theme_sara_90() + theme(axis.text=element_text(size=9), 
                                   axis.title.x = element_blank(), axis.title.y=element_text(size=11))
p4 <- p4 + scale_x_discrete(limits=x.ord)
p4 <- p4 + labs(y="Percentage MT")

#legend <- cowplot::get_legend(p2 + theme(legend.box.margin = margin(0, 0, 0, 12)))

cowplot::plot_grid(p1 + theme(legend.position="none"),
                   p2 + theme(legend.position="none"),
                   p3 + theme(legend.position="none"),
                   p4 + theme(legend.position="none"),
                   align = 'vh',
                   labels = c("A", "B", "C", "D"),
                   hjust = -1,
                   nrow = 2)

Normalize and scale data

Only standard transformation is recommended at this time, due to statistical suitability and computational efficiency. SCTransformation is possible but has a very long run time.

Idents(seurat) <- seurat@meta.data$orig.ident
seurat <- subset(seurat, downsample=params$downsample_cells)
num_cells <- ncol(seurat)
num_genes <- nrow(seurat)

The seurat dataset used for this analysis has:
- 96281 cells
- 32285 genes


if (params$transform=="standard") {
  std_prefix <- "RNA_snn_res."
  seurat <- NormalizeData(seurat, 
                        normalization.method = "LogNormalize", 
                        scale.factor = 10000)
  
   seurat <- FindVariableFeatures(object=seurat, 
                               selection.method = "vst", 
                               nfeatures = params$num_var_feature)

  seurat <- ScaleData(object = seurat,
                        vars.to.regress=var_regress,
                        verbose = TRUE)

} else if (params$transform=="sct_v2") {
  # SCTransform must be run on individual samples
  std_prefix <- "SCT_snn_res."
  seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
  seurat.list <- lapply(seurat.list,
                        function(s) SCTransform(s, 
                                                vars.to.regress=var_regress,
                                                variable.features.n=params$num_var_feature,
                                                vst.flavor="v2",
                                                method="glmGamPoi",
                                                min_cells=20))
  num_seurat_obj <- length(seurat.list)
  seurat <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
} else if (params$transform=="sct_v1") {
  std_prefix <- "SCT_snn_res."
  seurat.list <- SplitObject(seurat_dbl, split.by="SampleID")
  seurat.list <- BiocParallel::bplapply(seurat.list,
                        function(s) SCTransform(s, 
                                                vars.to.regress=var_regress,
                                                variable.features.n=params$num_var_feature),
                        BPPARAM=MulticoreParam())
  num_seurat_obj <- length(seurat.list)
  seurat <- merge(x=seurat.list[[1]], y=seurat.list[2:num_seurat_obj])
}

Cell Cycle score

Cell cycle score is only supported for human and mouse data sets.

if (params$organism=="human") {
 ### [1] Create G2M marker gene list (i.e. genes associated with the G2M phase of the 
### cell cycle using Seurat's built-in cc.genes (cell cycle) genes list
g2m.genes <- cc.genes$g2m.genes
s.genes <- cc.genes$s.genes
g2m <- rownames(seurat)[rownames(seurat) %in% g2m.genes]

} else if (params$organism=="mouse") {
  cc_file <- RCurl::getURL("https://raw.githubusercontent.com/hbc/tinyatlas/master/cell_cycle/Mus_musculus.csv") 
  cell_cycle_genes <- read.csv(text = cc_file)
  
  ah <- AnnotationHub::AnnotationHub()

  # Access the Ensembl database for organism
  ahDb <- AnnotationHub::query(ah, 
              pattern = c("Mus musculus", "EnsDb"), 
              ignore.case = TRUE)

  # Acquire the latest annotation files
  id <- ahDb %>%
          mcols() %>%
          rownames() %>%
          tail(n = 1)

  # Download the appropriate Ensembldb database
  edb <- ah[[id]]

  # Extract gene-level information from database
  annotations <- genes(edb, 
                       return.type = "data.frame")

  # Select annotations of interest
  annotations <- annotations %>%
                  dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)


  # Get gene names for Ensembl IDs for each gene
  cell_cycle_markers <- dplyr::left_join(cell_cycle_genes, annotations, by = c("geneID" = "gene_id"))

  # Acquire the S phase genes
  s.genes <- cell_cycle_markers %>%
              dplyr::filter(phase == "S") %>%
              dplyr::pull("gene_name")
        
  # Acquire the G2M phase genes        
  g2m.genes <- cell_cycle_markers %>%
                dplyr::filter(phase == "G2/M") %>%
                dplyr::pull("gene_name")
}


### [2] Calculate G2M marker module score.
seurat <- CellCycleScoring(seurat, 
                           s.features = s.genes, ### Genes associated with s-phase
                           g2m.features = g2m.genes, ### Genes associated with G2M phase
                           set.ident = TRUE)

Proportion of cells per sample in each phase

Heatmap

tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data$SampleID),2)

nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data$SampleID)))

ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Barplot

cell_cycle.cols <- c("#01617E","#8B9934","#E04883")
names(cell_cycle.cols) <- levels(as.factor(seurat@meta.data$Phase))
p <- ggplot(data=seurat@meta.data, aes(x=SampleID, fill=Phase))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=cell_cycle.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
               axis.text=element_text(size=9),
               legend.text=element_text(size=9),
               legend.title=element_text(size=9),
               axis.title.y=element_text(size=10),
               strip.text=element_text(size=10),
               legend.position="bottom")
p

Cell cycle regression

If cell cycle was previously identified as having an effect on the clustering and the differences are not biological, the difference between G2M and S phase scores will be calculated and used as the variable to regress. For most cases, it is not recommended to regress out cell cycle. Other variables will also be regressed out at this time.

seurat@meta.data$cc_dif <- seurat@meta.data$S.Score - seurat@meta.data$G2M.Score

var_regress <- gsub("cc", "cc_dif", params$var_regress)

seurat <- ScaleData(object = seurat,
                        vars.to.regress=var_regress,
                        verbose = TRUE)

Annotation

Prepare reference data

The Human Primary Cell Atlas (HPCA, human) and ImmGen (mouse) are both good choices. Mouse data can also be annotated with HPCA. HPCA is written as HPCA when the data is from human or when human reference gene names are converted to mouse gene names via biomart. HPCA is denoted as hpca when the reference data is converted to mouse names by changing to sentence case.

ref.data.list <- list()
if (params$organism=="human") {
  if ("HPCA" %in% params$anno_db) {
    ref.data.list[["HPCA"]] <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE) 
  }
}
if (params$organism=="mouse") {
  if ("HPCA" %in% params$anno_db) {
      #human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
      #mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
      ref.data <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE)  
      hpa_human.genes <- rownames(assay(ref.data))

      human_to_mouse.df <- readRDS(params$biomart_rds)
      #human_to_mouse.df <- getLDS(attributes=c("external_gene_name"),
      #                            filters="external_gene_name",
      #                            values = hpa_human.genes, 
      #                            mart = human,
      #                            attributesL = c("external_gene_name"), martL = mouse)
    
      assay.data <- as.data.frame(assay(ref.data))
      assay.data$Gene.name <- rownames(assay.data)
      assay.mdata <- merge(assay.data, human_to_mouse.df, by="Gene.name")
      assay.mdata$Gene.name <- NULL
      assay.mdata <- assay.mdata[!duplicated(assay.mdata$Gene.name.1),]
      assay.mdata <- tibble::remove_rownames(assay.mdata)
      assay.mdata <- tibble::column_to_rownames(assay.mdata, "Gene.name.1")

      # Lose about 3k marker genes out of 19k
      dim(assay.mdata)
      dim(assay.data)

      # over-write ref data
      h2m_ref.data <- SummarizedExperiment(assays=SimpleList(logcounts=as.matrix(assay.mdata)),
                                     rowData=rownames(assay.mdata),
                                     colData=colData(ref.data),
                                     metadata=metadata(ref.data))
      ref.data.list[["HPCA"]] <- h2m_ref.data
  }    
  if ("hpca" %in% params$anno_db) {
      ref.data <- celldex::HumanPrimaryCellAtlasData(ensembl=FALSE)  
      hpa_human.genes <- rownames(assay(ref.data))
      
      assay.data <- as.data.frame(assay(ref.data))
      assay.data$Gene.name <- rownames(assay.data)
      assay.data$Gene.name <- stringr::str_to_sentence(assay.data$Gene.name)
      rownames(assay.data) <- assay.data$Gene.name
      assay.data$Gene.name <- NULL
      
      # overwrite reference data
      h2m_ref.data <- SummarizedExperiment(assays=SimpleList(logcounts=as.matrix(assay.data)),
                                           rowData=rownames(assay.data),
                                           colData=colData(ref.data),
                                           metadata=metadata(ref.data))
      ref.data.list[["hpca"]] <- h2m_ref.data
  }
  if ("ImmGen" %in% params$anno_db) {
  ref.data.list[["ImmGen"]] <- celldex::ImmGenData(ensembl=FALSE)  
  }
}

SingleR annotation

The differential expression method of Wilcoxon is used for training the classifier, which often provides a better annotation over the classic method. See links below. Spearman’s correlation is used for classification.


The Single R Book

SingleR marker detection


cells.list <- BiocParallel::bplapply(ref.data.list,
                                     function(ref) SingleR(test=seurat@assays[["RNA"]]@data,
                                                           ref=ref, 
                                                           labels=ref$label.main, 
                                                           fine.tune = TRUE, 
                                                           de.method="wilcox", 
                                                           de.n=30, 
                                                           BPPARAM=bpparam)) 
### Add to seurat
for (ann in names(cells.list)) {
  ann_name <- paste0("SingleR_", ann)
  ann_name_pruned <- paste0(ann_name, "_pruned")
  seurat@meta.data[[ann_name]] <- cells.list[[ann]]$labels
  seurat@meta.data[[ann_name_pruned]] <- cells.list[[ann]]$pruned.labels
}
num_ann <- length(cells.list)
nrow <- round((num_ann+.1)/2, digits=0)
fh <- nrow*6

Assess annotation

Heatmaps

Cells assigned to a particular cell type should have high scores. Here we look at 500 random cells.

rand_cells <- runif(500, min=1, max=500)
plot_shmap <- function(anno_db, cells) {
  p <- plotScoreHeatmap(cells, 
                        cells.use=rand_cells, 
                        silent=TRUE,
                        treeheight_row=0,
                        fontsize=9,
                        main=anno_db)
  return(p[[4]])
}

shmap.plots <- sapply(names(cells.list), 
                      function(a) plot_shmap(anno_db=a,
                                             cells=cells.list[[a]]),
                      simplify=FALSE, USE.NAMES=TRUE)



cowplot::plot_grid(plotlist=shmap.plots,
                   align="vh",
                   labels="AUTO",
                   label_size=12,
                   ncol=2,
                   nrow=nrow)

Deviation from assignment scores (delta)

Some cell types, such as monocytes, macrophages and dendritic cells may be especially difficult to predict.

plot_delta <- function(anno_db, cells) {
  p <- plotDeltaDistribution(cells, 
                      ncol=5,
                      size=0.5) +
                      theme_sara()+ theme(strip.text = element_text(size=9),
                                          legend.position="bottom",
                                          axis.text.x=element_blank())+
                      labs(y="Delta score", x="", title=anno_db)
  return(p)
}

delta.plots <- sapply(names(cells.list), 
                      function(a) plot_delta(anno_db=a,
                                             cells=cells.list[[a]]),
                      simplify=FALSE, USE.NAMES=TRUE)

cowplot::plot_grid(plotlist=delta.plots,
                   align="vh",
                   labels="AUTO",
                   label_size=12,
                   ncol=2,
                   nrow=nrow)

pa <- params$pref_ann
pref_ann <- paste0("SingleR_", pa)

prune <- pruneScores(cells.list[[pa]],
                   nmads = 3,
                   min.diff.med = -Inf,
                   min.diff.next = 0,
                   get.thresholds = FALSE)

prune <- which(prune=="TRUE")
keep <- cells.list[[pa]][-prune,]
seurat <- seurat[,-prune]
SingleR_names <- paste0("SingleR_", names(cells.list))
names(SingleR_names) <- SingleR_names

collapse_other <- function(seurat, ref_label, freq.ll, cells.list) {
  seurat@meta.data[[ref_label]] <- factor(seurat@meta.data[[ref_label]])
  freq<-data.frame(janitor::tabyl(seurat@meta.data[[ref_label]], sort = TRUE))
  colnames(freq)<-c("cells","n","proportion")
  freq$percent<-freq$proportion*100
  freq<-freq[order(freq$n, decreasing = TRUE),]
  others<-as.vector(freq$cells[which(freq$n<freq.ll)])
  seurat@meta.data[[ref_label]] <- forcats::fct_collapse(seurat@meta.data[[ref_label]],
                                           Others=c(others))
  return(seurat@meta.data[[ref_label]])
}

for (a in SingleR_names) {
  seurat@meta.data[[a]] <- collapse_other(seurat,
                                                  ref_label=a,
                                                  freq.ll=params$ann_collapse_n,
                                                  cells.list=cells.list)
}
if ("HPCA" %in% params$anno_db) {
  seurat@meta.data$SingleR_HPCA <- as.factor(seurat@meta.data$SingleR_HPCA)
  seurat@meta.data$SingleR_HPCA <- forcats::fct_collapse(seurat@meta.data$SingleR_HPCA, 
                                                         B_cell=c("B_cell", "Pro-B_cell_CD34+", "Pre-B_cell_CD34-"),
                                                         Others=c("Others", "Embryonic_stem_cells", "GMP",
                                                                  "Hepatocytes", "iPS_cells", "Keratinocytes",
                                                                  "MSC", "Neuroepithelial_cell", "Neurons"),
                                                         HSC=c("HSC_-G-CSF", "HSC_CD34+"),
                                                         Fibroblasts=c("Fibroblasts", "Chondrocytes",
                                                                       "Smooth_muscle_cells",
                                                                       "Tissue_stem_cells"),
                                                         Myelocyte=c("Myelocyte", "Pro-Myelocyte"))
    
}
if ("hpca" %in% params$anno_db) {
 seurat@meta.data$SingleR_hpca <- as.factor(seurat@meta.data$SingleR_hpca)
  seurat@meta.data$SingleR_hpca <- forcats::fct_collapse(seurat@meta.data$SingleR_hpca, 
                                                         B_cell=c("B_cell", "Pro-B_cell_CD34+", "Pre-B_cell_CD34-"),
                                                         Others=c("Others", "Embryonic_stem_cells", "GMP",
                                                                  "Hepatocytes", "iPS_cells", "Keratinocytes",
                                                                  "MSC", "Neuroepithelial_cell", "Neurons"),
                                                         HSC=c("HSC_-G-CSF", "HSC_CD34+"),
                                                         Fibroblasts=c("Fibroblasts", "Chondrocytes",
                                                                       "Smooth_muscle_cells",
                                                                       "Tissue_stem_cells"),
                                                          Myelocyte=c("Myelocyte", "Pro-Myelocyte"))
}
if ("ImmGen" %in% params$anno_db) {
    seurat@meta.data$SingleR_ImmGen <- as.character(seurat@meta.data$SingleR_ImmGen)
    seurat@meta.data$SingleR_ImmGen <- if_else(seurat@meta.data$SingleR_ImmGen=="B cells, pro", 
                                               "B cells",
                                                seurat@meta.data$SingleR_ImmGen)
}

Pie chart

Frequency of different cell types

dat <- as.data.table(seurat@meta.data)
ann <- paste0("SingleR_", params$pref_ann)

dt <- dat[, .N, by=ann] 
dt <- dt[order(dt[[ann]]),]


if (ann %chin% names(ref.cols)) {
  pref_ann.cols <- ref.cols[[ann]]
} else {
  pref_ann.cols <- colorRampPalette(pals::watlington(n=16))(nrow(dt))
  names(pref_ann.cols) <- dt[[ann]]
}

p <- ggplot(dt, aes(x="", y=N, fill=.data[[ann]]))
p <- p + geom_bar(stat="identity", width=1, color="white")
p <- p + coord_polar("y", start=0)
p <- p + scale_fill_manual(values=pref_ann.cols)
p <- p + theme_void()  
p

PCA

seurat <- RunPCA(seurat)

Elbow plot

The elbow plot gives us an idea of how many PCs will be significant. When the plot plateaus, that is the point of diminishing returns.

dat <- data.frame(`Standard_deviation`=seurat@reductions[["pca"]]@stdev,
                  PC=c(1:length(seurat@reductions[["pca"]]@stdev)))
p <- ggplot(data=dat, aes(x=PC, y=Standard_deviation))
p <- p + geom_bar(stat="identity", fill="#6AAAB7", color=NA)
p <- p + theme_sara()
p <- p + labs(y="Standard deviation")
p

Dimension loadings

The dimension loadings give us an idea of the top genes contributing to each PC.

plot.list <- VizDimLoadings(seurat, dims=1:9, ncol=3, combine=FALSE, col=NA)
for (i in 1:length(plot.list)) {
  pc <- paste("PC", i, sep="_")
  #scale_lim <- max(a)
  p <- plot.list[[i]]
  xscale_lim <- max(abs(p$data[[pc]]))
  p <- p + geom_bar(stat="identity", aes(fill=.data[[pc]])) 
  p<- p + scale_fill_gradient2(low=unikn_blue_red.cols[1], 
                               mid="white",
                               high=unikn_blue_red.cols[3],
                               limits=c(-xscale_lim, xscale_lim))
  p <- p + theme_sara() 
  p <- p + theme(axis.title = element_blank(),
                 axis.text.x=element_text(angle=90))
  plot.list[[i]] <- p
}

cowplot::plot_grid(plotlist=plot.list,
                   align="vh",
                   labels="AUTO",
                   label_size=12,
                   ncol=3)

Clustering & UMAP

seurat <- RunUMAP(seurat, dims = 1:30, reduction.key="UMAP_")
seurat <- FindNeighbors(seurat, dims = 1:30)
# To-do parallelize, Seurat parallelization is not good.
for (alg in params$cluster_algorithm) {
  if (alg=="Louvain2") {
    seurat <- FindClusters(seurat, algorithm=2, 
                           resolution=params$cluster_resolution,
                           verbose=FALSE)
    names(seurat@meta.data) <- gsub(std_prefix, "Louvain2_", names(seurat@meta.data))
  }
  if (alg=="Leiden") {
     seurat <- FindClusters(seurat, algorithm = 4, 
                            resolution=params$cluster_resolution, 
                            method="igraph",
                            verbose=FALSE)
    names(seurat@meta.data) <- gsub(std_prefix, "Leiden_", names(seurat@meta.data))
  }
  if (alg=="Louvain") {
    seurat <- FindClusters(seurat, algorithm=1, 
                           resolution=params$cluster_resolution,
                           verbose=FALSE)
    names(seurat@meta.data) <- gsub(std_prefix, "Louvain_", names(seurat@meta.data))
  }
  if (alg=="SLM") {
    seurat <- FindClusters(seurat, algorithm=3, 
                           resolution=params$cluster_resolution,
                           verbose=FALSE)
    names(seurat@meta.data) <- gsub(std_prefix, "SLM_", names(seurat@meta.data))
  }
}
saveRDS(seurat, file=final_seurat)

QC of clustering and annotation

How well do clusters agree with annotation?

We calculate the adjusted rand index (ARI) for clusters vs annotations, which ranges from 0 to 1. Larger values indicate a better fit. Ideally, we would hope for an ARI above at least 0.5, although this can vary with the single-cell data and availability of suitable references.

cluster.names <- names(seurat@meta.data)[grep("Leiden|Louvain", names(seurat@meta.data))]
names(cluster.names) <- cluster.names
ARI.list <- lapply(cluster.names, 
                   function(c) lapply(SingleR_names, 
                                      function(a) mclust::adjustedRandIndex(seurat@meta.data[[a]],
                                                                            seurat@meta.data[[c]])))
ARI.list <- unlist(ARI.list, recursive=FALSE)
ARI.dt <- data.table(ID=names(ARI.list),
                     ARI=unlist(ARI.list))

ARI.dt <- ARI.dt[, c("Cluster") := tstrsplit(ID, "_", keep=1)]
ARI.dt <- ARI.dt[, c("Resolution", "Annotation") := tstrsplit(ID, "_", keep=c(2,3))]
ARI.dt$Resolution <- gsub(".SingleR", "", ARI.dt$Resolution)
ARI.dt$Annotation <- paste0("SingleR_", ARI.dt$Annotation)
ARI.dt$cluster_res <- paste(ARI.dt$Cluster, ARI.dt$Resolution, sep="_")
num_ann <- length(levels(as.factor(ARI.dt$Annotation)))
fw <- (num_ann*2.5)+1
nr <- round((num_ann+.1)/2, digits=0)
fh <- (nr*2.5)+.5
# karpfenblau2 "#B4BCD6", "#8290BB",  "#586BA4", "#3E5496","#324376
res.cols <- colorRampPalette(pal_karpfenblau)(length(params$cluster_resolution))
names(res.cols) <- levels(as.character(params$cluster_resolution))
p1 <- ggplot(data=ARI.dt, aes(x=Cluster, y=ARI, fill=as.character(Resolution)))
p1 <- p1 + geom_bar(stat="identity", color="white", position=position_dodge(preserve="total")) + scale_fill_manual(name="Resolution", values=res.cols)
p1 <- p1 + ylim(0,1)
p1 <- p1 + facet_wrap(~Annotation, scale="free_x", nrow=nr)
p1 <- p1 + theme_sara() + theme(strip.text = element_text(size=11))
p1

# Pick top ARI regardless of preferred annotation
setorder(ARI.dt, -ARI)
ARI.dt2 <- ARI.dt[, c("Annotation", "Cluster", "Resolution", "ARI"), with=FALSE]
ARI.dt2$ARI <- round(ARI.dt2$ARI, digits=4)
kable(ARI.dt2, booktabs = TRUE) %>%
  kable_styling(font_size = 12)
Annotation Cluster Resolution ARI
SingleR_ImmGen Louvain2 0.2 0.2416
SingleR_ImmGen Louvain2 0.15 0.2407
SingleR_hpca Louvain2 0.15 0.2370
SingleR_hpca Louvain2 0.2 0.2329
SingleR_ImmGen Louvain2 0.5 0.1369
SingleR_hpca Louvain2 0.5 0.1066
SingleR_ImmGen Louvain2 0.8 0.0703
SingleR_ImmGen Louvain2 1.1 0.0558
SingleR_hpca Louvain2 0.8 0.0554
SingleR_hpca Louvain2 1.1 0.0446
SingleR_ImmGen Louvain2 1.4 0.0425
SingleR_hpca Louvain2 1.4 0.0310
top_ann <- ARI.dt$Annotation[1]
top_clust_res <- ARI.dt$cluster_res[1]

if (top_ann != pref_ann) {
  pref.ARI.dt <- ARI.dt[Annotation==pref_ann]
  setorder(pref.ARI.dt, -ARI)
  pref_clust_res <- pref.ARI.dt$cluster_res[1]
  poor_choice=TRUE
} else {
  pref_clust_res <- top_clust_res
  poor_choice=FALSE
}

Comparison of UMAP by cluster vs annotation

UMAP clusters
Inspecting the UMAP This will give us an idea of how many clusters and how well separated. Look for the cells to stay within their cluster.

top_clust.cols <- colorRampPalette(carto_pal(n=12, "Vivid"))(length(levels(as.factor(seurat@meta.data[[top_clust_res]]))))
names(top_clust.cols) <- levels(as.factor(seurat@meta.data[[top_clust_res]]))


p <- DimPlot(seurat, group.by=top_clust_res) 
p <- p + scale_color_manual(values=top_clust.cols)
p <- p + theme_sara()
p

UMAPs

Look for cells to be clustered well by cell type.

Top annotation

A UMAP of top annotation shows how well the clusters and annotation match up.

if (top_ann %chin% names(ref.cols)) {
  top_ann.cols <- ref.cols[[top_ann]]
} else {
   top_ann.cols <- colorRampPalette(pals::alphabet2(n=26))(nrow(dt))
   names(top_ann.cols) <- levels(as.factor(seurat@meta.data[[top_ann]]))

}

p <- DimPlot(seurat, group.by=top_ann) 
p <- p + scale_color_manual(values=top_ann.cols)
p <- p + theme_sara()
p

Preferred annotation

If the top annotation based on ARI is not the preferred annotation, the preferred annotation is plotted here.

p <- DimPlot(seurat, group.by=pref_ann) 
p <- p + scale_color_manual(values=pref_ann.cols)
p <- p + theme_sara()
p <- p + theme(plot.title=element_blank())
p

Barplots

The annotation barplot is another visualization of cluster/annotation agreement. Bars should be mostly one (or two) kinds of cells, although this depends on the granularity of the annotation.

Top annotation

Barplot of best-scoring annotation according to ARI.

dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=.data[[top_clust_res]], fill=.data[[top_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=top_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
               axis.text=element_text(size=9),
               legend.text=element_text(size=9),
               legend.title=element_text(size=9),
               axis.title.y=element_text(size=10),
               strip.text=element_text(size=10),
               legend.position="bottom")
p

Preferred annotation

The preferred annotation is show here, with it’s best performing clustering, if it is different from top.

 dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=.data[[pref_clust_res]], fill=.data[[pref_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=pref_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
               axis.text=element_text(size=9),
               legend.text=element_text(size=9),
               legend.title=element_text(size=9),
               axis.title.y=element_text(size=10),
               strip.text=element_text(size=10),
               legend.position="bottom")
p

Cluster markers

If the cluster and annotations don’t match up well, it can be helpful to look at the markers that differentiate the clusters. Cell cycle markers, for example, may be problematic.

Idents(seurat) <- seurat@meta.data[[top_clust_res]]
markers <- FindAllMarkers(seurat, 
                          only.pos = TRUE,
                          min.pct = 0.3, 
                          logfc.threshold = .5,
                          max.cells.per.ident=5000,
                          random.seed = 888)
markers %>%
    group_by(cluster) %>%
    top_n(n = 5, wt = avg_log2FC) -> top5
marker.genes <- top5$gene
seurat.hm <- subset(seurat, downsample=1000)
mat<- seurat.hm[["RNA"]]@data[marker.genes, ] %>% as.matrix()
mat<- log2(mat+1)
#mat <- seurat.hm[["RNA"]]@scale.data[marker.genes,] %>% as.matrix() # use this and dont log to get DoHeatmap style
# cell colors
cell_anno<- seurat.hm@meta.data[[pref_ann]]
cell.levels <- levels(droplevels(as.factor(cell_anno)))
c.cols <- pref_ann.cols[names(pref_ann.cols) %in% cell.levels]

# cluster colors
clust_anno <- as.character(seurat.hm@meta.data[[top_clust_res]])
clust_levels <- levels((droplevels(as.factor(clust_anno))))
clust.cols <- colorRampPalette(carto_pal(n=12, name="Vivid"))(length(clust_levels))
names(clust.cols) <- clust_levels
  

# cluster rowcolors
clust_anno2 <- as.character(top5$cluster)
clust_levels <- levels((droplevels(as.factor(clust_anno2))))
clust.cols2 <- clust.cols


ta <- ComplexHeatmap::HeatmapAnnotation(`Cluster`=clust_anno,
                                        `Cell type`=cell_anno,
                                         col=list(`Cell type`=c.cols,
                                                  `Cluster`=clust.cols))

ra <- ComplexHeatmap::rowAnnotation(`Cluster`=clust_anno2,
                                     col=list(`Cluster`=clust.cols2),
                                     show_legend=FALSE)

heatmap2.colfun <- microViz::heat_palette(palette = "Cividis",
                                          breaks = 100,
                                          range = c(0,max(mat)),
                  sym = FALSE,
                  rev = FALSE)

ComplexHeatmap::Heatmap(mat, name = "Expression",  
                        cluster_columns = FALSE,
                        cluster_rows=FALSE,
                        column_split=seurat.hm@meta.data[[top_clust_res]],
                        row_split=top5$cluster,
                        row_names_gp = grid::gpar(fontsize = 11),
                        column_title=character(0),
                        column_gap = unit(0.5, "mm"),
                        col = heatmap2.colfun,
                        top_annotation = ta,
                        show_column_names = FALSE,
                        left_annotation = ra)

Clusters by sample

This may be biologically ok, as long as the clusters also do not have high percent.mt, and/or low or high numbers of genes or UMIs

UMAP

Inspect the UMAP, does it look like some samples dominate clusters?

p <- DimPlot(seurat, group.by="SampleID") 
p <- p + scale_color_manual(values=sample.cols)
p <- p + theme_sara()
p

Heatmap

The heatmap provides an alternative means of examining cluster/sample assignment.

tab<-prop.table(table(seurat@meta.data$SampleID, seurat@meta.data[[top_clust_res]]),2)

nr <- length(levels(as.factor(seurat@meta.data$SampleID)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Annotations by sample

We may have expectations about the expected proportion of cells in the dataset for certain cell types. Of course, we could be wrong, which might be fun, or not.

Top annotation

dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=SampleID, fill=.data[[top_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=top_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
               axis.text=element_text(size=9),
               legend.text=element_text(size=9),
               legend.title=element_text(size=9),
               axis.title.y=element_text(size=10),
               strip.text=element_text(size=10),
               legend.position="bottom")
p

Preferred annotation

If the top annotation based on ARI is not the preferred annotation, the preferred annotation is plotted here.

dat <- seurat@meta.data
p <- ggplot(data=dat, aes(x=SampleID, fill=.data[[pref_ann]]))
p <- p + geom_bar(position="fill", color="black")
p <- p + theme_sara_90()
p <- p + scale_fill_manual("", values=pref_ann.cols)
p <- p + labs(y="Proportion")
p <- p + theme(axis.title.x=element_blank(),
               axis.text=element_text(size=9),
               legend.text=element_text(size=9),
               legend.title=element_text(size=9),
               axis.title.y=element_text(size=10),
               strip.text=element_text(size=10),
               legend.position="bottom")
p

Is there an effect of cell cycle?

There may be some structure in the UMAP due to cell cycle, it is also a good idea to check the PCs and cluster markers.

p <- DimPlot(seurat, group.by="Phase") 
p <- p + scale_color_manual(values=cell_cycle.cols)
p <- p + theme_sara()
p

Cell cycle heatmaps by cluster and annotation

If cell cycle is evaluated, the results will be plotted here.

Cluster

tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data[[top_clust_res]]),2)

nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Annotation

tab<-prop.table(table(seurat@meta.data$Phase, seurat@meta.data[[top_ann]]),2)

nr <- length(levels(as.factor(seurat@meta.data$Phase)))
nc <- length(levels(as.factor(seurat@meta.data[[top_ann]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Cell metrics effects

First we will look at the UMAP for visual inspection. Extremely low or high UMI/genes in a cluster or high percent.mt in a cluster may be problematic.

feature_plot <- function(seurat, feature) {
  p <- ggplot(data=as.data.frame(seurat@reductions$umap@cell.embeddings), 
              aes(x=UMAP_1, y=UMAP_2, 
                  color=seurat@meta.data[[feature]]))
  p <- p + geom_point(size=0.1)
  p <- p + theme_sara()
  if(feature == "percent.mt") {
      p <- p + scale_color_viridis_c(option="rocket", 
                                 direction=-1)
  } else {
    p <- p + scale_color_viridis_c(option="rocket", 
                                 direction=-1,
                                 labels=scales::label_scientific())
  }
  p <- p + theme(axis.title.y.right = element_blank(),
                 legend.position="right",
                 legend.title=element_blank())
  p <- p + labs(title=feature)
return(p)
}

p.list <- sapply(names(features), 
                 function(feature) feature_plot(seurat, feature),
                 simplify=FALSE, USE.NAMES=TRUE)

cowplot::plot_grid(plotlist=p.list,
                   align="vh",
                   labels="AUTO",
                   label_size=12,
                   ncol=2,
                   nrow=2)

Cell metrics ridgeplots

Clusters

It may be helpful to investigate further clusters that have multiple cell types.

p1 <- RidgePlot(seurat, group.by=top_clust_res, features="nFeature_RNA", same.y.lims=TRUE, sort=TRUE) 
p1 <- p1 + scale_fill_manual(values=top_clust.cols)
p1 <- p1 + theme(legend.position="none", 
                 axis.title.y=element_blank(),
                 axis.text.x=element_text(size=11))
p1 <- p1 + scale_x_continuous(labels = scales::scientific)

p2 <- RidgePlot(seurat, group.by=top_clust_res, features="nCount_RNA", same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=top_clust.cols)
p2 <- p2 + theme(legend.position="none", 
                 axis.text=element_text(size=11),
                 axis.title.y=element_blank())
p2 <- p2 + scale_x_continuous(labels = scales::scientific)

mt.ul <- max(seurat@meta.data$percent.mt)+5
p3 <- RidgePlot(seurat, group.by=top_clust_res, features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=top_clust.cols)
p3 <- p3 + xlim(0, mt.ul)
p3 <- p3 + theme(legend.position="none", 
                 axis.title.y=element_blank(),
                 axis.text=element_text(size=11))

prow <- cowplot::plot_grid(p1,
                           p2,
                           p3,
                           align = 'vh',
                           labels = "AUTO",
                           hjust = -1,
                           nrow = 1)
prow

Annotation

p1 <- RidgePlot(seurat, group.by=top_ann, features="nFeature_RNA", same.y.lims=TRUE, sort=TRUE) 
p1 <- p1 + scale_fill_manual(values=top_ann.cols)
p1 <- p1 + theme(legend.position="none", axis.title.y=element_blank())

p2 <- RidgePlot(seurat, group.by=top_ann, features="nCount_RNA", same.y.lims=TRUE, sort=TRUE, log=TRUE)
p2 <- p2 + scale_fill_manual(values=top_ann.cols)
p2 <- p2 + theme(legend.position="none", axis.title.y=element_blank())

p3 <- RidgePlot(seurat, group.by=top_ann, features="percent.mt", y.max=50, sort=TRUE)
p3 <- p3 + scale_fill_manual(values=top_ann.cols)
p3 <- p3 + theme(legend.position="none", axis.title.y=element_blank())
p3 <- p3 + xlim(0, percent.mt.ul+2)

prow <- cowplot::plot_grid(p1,
                           p2,
                           p3,
                           align = 'vh',
                           labels = "AUTO",
                           hjust = -1,
                           nrow = 1)
prow

Doublet exploration

Section below will only be evaluated if doublet detection occurred. It is recommended to run first with doublet detection, then with doublet filtering as needed. The hybrid approach from scds package is utilized for computational efficiency. Although scds provides a “call” based on the score, it may be more useful to view the distribution of the score. For visualization purposes, we will call anything with a score of 1 or greater a doublet.

UMAPs

Doublet score

 p <- FeaturePlot(seurat, features="hybrid_score") 
p <- p + scale_color_viridis_c(option="rocket", direction=-1)
p <- p + theme_sara()
p

Doublet call


- eDoublet refers to cells estimated to be a doublet with a hybrid score greater than or equal to 1
- eSingletrefers to cells estimated to be a singlet with a hybrid score less than 1

p <- DimPlot(seurat, group.by="hybrid_call") 
p <- p + theme_sara()
p <- p + scale_color_manual(values=dbl.cols)
p <- p + theme(plot.title=element_blank())
p

Clusters

Assessing the clusters will help us determine if there are any clusters prone to doublets. Clusters with doublets are good targets for removal.

Ridgeplot

p4 <- RidgePlot(seurat, group.by=top_clust_res, features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=top_clust.cols)
p4

Heatmap

tab<-prop.table(table(seurat@meta.data$hybrid_call, seurat@meta.data[[top_clust_res]]),2)

nr <- length(levels(as.factor(seurat@meta.data$hybrid_call)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Density

p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[top_clust_res]])
p

Doublets and annotation

Ridgeplot

p4 <- RidgePlot(seurat, group.by=pref_ann, features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=pref_ann.cols)
p4

Heatmap

tab<-prop.table(table(seurat@meta.data$hybrid_call, seurat@meta.data[[pref_ann]]),2)

nr <- length(levels(as.factor(seurat@meta.data$hybrid_call)))
nc <- length(levels(as.factor(seurat@meta.data[[pref_ann]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Density: Annotation - nCount

p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Density: Annotation - nFeature

p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=nFeature_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Density: Annotation - percent.mt

p <- ggplot(data=seurat@meta.data, aes(x=hybrid_score, y=percent.mt))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Cell cycle

If cycle cycle is evaluated, results will be plotted here.

Ridgeplot

p4 <- RidgePlot(seurat, group.by="Phase", features="hybrid_score", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=cell_cycle.cols)
p4

Boxplots

p <- ggplot(data=seurat@meta.data, aes(x=Phase, y=hybrid_score, fill=Phase))
p <- p + geom_boxplot()
p <- p + scale_fill_manual(values=cell_cycle.cols)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Decontamination exploration

Section below will only be evaluated if decontamination detection occurred. It is recommended to run first with decontamination detection, then with decontamination filtering as needed.

UMAPs

Decontamination percent

p <- FeaturePlot(seurat, features="decontX_contamination") 
p <- p + scale_color_viridis_c(option="rocket", direction=-1)
p <- p + theme_sara()
p

Decontamination threshold

A threshold of 0.6 was utilized.

p <- DimPlot(seurat, group.by="contam_status") 
p <- p + theme_sara()
p <- p + scale_color_manual(values=dcx.cols)
p <- p + theme(plot.title=element_blank())
p

Clusters

Assessing the clusters will help us determine if there are any clusters prone to ambient RNA contamination

Ridgeplot

p4 <- RidgePlot(seurat, group.by=top_clust_res, features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=top_clust.cols)
p4

Heatmap

tab<-prop.table(table(seurat@meta.data$contam_status, seurat@meta.data[[top_clust_res]]),2)

nr <- length(levels(as.factor(seurat@meta.data$contam_status)))
nc <- length(levels(as.factor(seurat@meta.data[[top_clust_res]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Density

p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[top_clust_res]])
p

Contamination and annotation

Ridgeplot

p4 <- RidgePlot(seurat, group.by=pref_ann, features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=pref_ann.cols)
p4

Heatmap

tab<-prop.table(table(seurat@meta.data$contam_status, seurat@meta.data[[pref_ann]]),2)

nr <- length(levels(as.factor(seurat@meta.data$contam_status)))
nc <- length(levels(as.factor(seurat@meta.data[[pref_ann]])))


ComplexHeatmap::Heatmap(tab,
                        cluster_columns = TRUE,
                        show_column_dend = TRUE,
                        row_names_gp = grid::gpar(fontsize = 12),
                        column_names_gp = grid::gpar(fontsize=12),
                        cluster_rows = TRUE,
                        show_row_dend = FALSE,
                        col = heatmap.colfun,
                        height = unit(5, "mm")*nr,
                        width = unit(5, "mm")*nc,
                        name="Proportion")

Density: Annotation - nCount

p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nCount_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Density: Annotation - nFeature

p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=nFeature_RNA))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Density: Annotation - percent.mt

p <- ggplot(data=seurat@meta.data, aes(x=decontX_contamination, y=percent.mt))
p <- p + geom_pointdensity()
p <- p + scale_color_nord(palette="lumina", discrete=FALSE, reverse=TRUE)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Cell cycle

If cycle cycle is evaluated, results will be plotted here.

Ridgeplot

p4 <- RidgePlot(seurat, group.by="Phase", features="decontX_contamination", sort=TRUE)
p4 <- p4 + scale_fill_manual(values=cell_cycle.cols)
p4

Boxplots

p <- ggplot(data=seurat@meta.data, aes(x=Phase, y=decontX_contamination, fill=Phase))
p <- p + geom_boxplot()
p <- p + scale_fill_manual(values=cell_cycle.cols)
p <- p + facet_wrap(~.data[[pref_ann]])
p

Covariate analysis

If experiment metadata was provided, it will be utilized here. Median PC loading for all cells per sample will be calculated for the first 15 PCs. An ANOVA (Kruskal-Wallis rank sum test) will be performed with categorical variables and a glm will be performed on all numeric variables. Nominal p-values are shown.

metadata.df <- as.data.frame(data.table::fread(params$exp_metadata))
metadata.df$SampleID <- metadata.df[[params$sample_id]]
names(metadata.df) <- janitor::make_clean_names(names(metadata.df))
setnames(metadata.df, "sample_id", "SampleID")
meta_var <- names(metadata.df)[!names(metadata.df) %chin% c(params$sample_id, "SampleID")]

# For testing
#cluster_algs <- c("Louvain")
#top_clust_res <- "Louvain2_0.15"
#pref_ann <- "SingleR_ImmGen"

# For use
cluster_algs <- params$cluster_algorithm

seur.meta <- as.data.frame(seurat@meta.data)
seur.meta$Clusters <- seur.meta[top_clust_res]
seur_var <- names(seur.meta)
seur_var <- seur_var[grep("ident|pruned|rmv|Filter|seurat_clusters", seur_var, invert=TRUE)]
seur_var <- seur_var[!seur_var %like% cluster_algs]
covariates <- c(meta_var, seur_var)
seur.meta <- seur.meta %>% dplyr::select(dplyr::all_of(seur_var))
setnames(seur.meta, c("S.Score", "G2M.Score"), c("s_score", "g2m_score"))

all.meta <- merge(seur.meta, metadata.df, by="SampleID")
all.meta$rn <- rownames(seur.meta)
covariate_PCA_analysis <- function(seurat, metadata, numPCs, covariates) {
  covariate.types <- sapply(metadata, class)
  covariate.types <- gsub("character", "factor", covariate.types)
  covariate.types <- covariate.types[names(covariate.types) %chin% covariates]
  cat_var <- covariate.types[grep("factor", covariate.types)]
  cat_var <- cat_var[grep("rn", names(cat_var), invert=TRUE)]
  num_var <- covariate.types[grep("factor", covariate.types, invert=TRUE)]
  pca.df <- as.data.frame(seurat@reductions$pca@cell.embeddings)[,1:numPCs]
  pca.df$rn <- rownames(pca.df)
  sample.df <- metadata[, c("rn", "SampleID")]
  pca.df <- merge(pca.df, sample.df, by="rn", all.x=TRUE, all.y=FALSE)
  pca.df$rn <- NULL
  median.df <- pca.df %>% 
                     dplyr::group_by(SampleID) %>%
                     summarize_all(list(median), na.rm=TRUE)
  pca_meta.df <- merge(median.df, metadata, by="SampleID", all.x=TRUE, all.y=FALSE)
  pca_meta.df <- pca_meta.df[!duplicated(pca_meta.df$SampleID),]
  endCol <- length(names(pca_meta.df))
  ann.df <- pca_meta.df[,1:numPCs+1]
  anova_p.list <- sapply(names(cat_var), 
                            function(f) apply(ann.df, 
                                              2, 
                                              function(x) kruskal.test(x ~ pca_meta.df[[f]])$p.value),
                         simplify=FALSE, USE.NAMES = TRUE)
  anova_res.pval <- plyr::ldply(anova_p.list, .id="Factor")
  
  
  glm_p.list <- sapply(names(num_var), 
                            function(f) apply(ann.df, 
                                              2, 
                                              function(x) summary(glm(x ~ as.numeric(pca_meta.df[[f]])))$coefficients[2,4]),
                         simplify=FALSE, USE.NAMES = TRUE)
  glm_res.pval <- plyr::ldply(glm_p.list, .id="Factor")
  results <- rbindlist(list(glm_res.pval, anova_res.pval))
  return(results)
}
covariate.results <- covariate_PCA_analysis(seurat=seurat, 
                                        metadata=all.meta, 
                                        numPCs=15, 
                                        covariates=meta_var)
cov.melt <- data.table::melt(covariate.results, 
                             id.vars=c("Factor"),
                             variable.name="PC",
                             value="pvalue")
cov.melt$PC <- gsub("_", "", cov.melt$PC)
cov.melt$PC_x <- gsub("PC", "", cov.melt$PC)
cov.melt$PC_x <- as.numeric(cov.melt$PC_x)
cov.melt$Significance <- ifelse(cov.melt$pvalue <= 0.05, "SIG", NA)
p <- ggplot(cov.melt, aes(x=PC_x, y=Factor, fill=-log10(pvalue), color=Significance))
p <- p + geom_tile(color="white")
p <- p + geom_point(pch=8, size=2)
p <- p + scale_fill_viridis_c(option="rocket", direction=-1)
p <- p + scale_color_manual(values=c(`SIG`="yellow"), na.value=NA)
p <- p + scale_x_discrete(breaks=c(seq(1,15)), limits=c(seq(1,15)))
p <- p + theme_minimal()
p <- p + theme(axis.title=element_blank(),
               axis.text=element_text(color="black", size=11))
p

end <- Sys.time()

That’s it! Spend some time looking through the report to determine next steps.


Analysis started: 2022-04-05 04:18:01
Analysis finished: 2022-04-05 07:20:19


R Session Information

Information about R, the OS and attached or loaded packages

pander::pander(sessionInfo(), compact = FALSE)

R version 4.1.2 (2021-11-01)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=C.UTF-8, LC_NUMERIC=C, LC_TIME=C.UTF-8, LC_COLLATE=C.UTF-8, LC_MONETARY=C.UTF-8, LC_MESSAGES=C.UTF-8, LC_PAPER=C.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=C.UTF-8 and LC_IDENTIFICATION=C

attached base packages:

  • grid
  • stats4
  • stats
  • graphics
  • grDevices
  • utils
  • datasets
  • methods
  • base

other attached packages:

  • microViz(v.0.9.0)
  • ComplexHeatmap(v.2.10.0)
  • SeuratObject(v.4.0.4)
  • Seurat(v.4.1.0)
  • celda(v.1.10.0)
  • ggpointdensity(v.0.1.0)
  • nord(v.1.0.0)
  • unikn(v.0.4.0)
  • mclust(v.5.4.9)
  • ggridges(v.0.5.3)
  • scales(v.1.1.1)
  • scds(v.1.10.0)
  • kableExtra(v.1.3.4)
  • rcartocolor(v.2.0.0)
  • BiocParallel(v.1.28.3)
  • multimode(v.1.5)
  • scater(v.1.22.0)
  • patchwork(v.1.1.1)
  • glmGamPoi(v.1.6.0)
  • ensembldb(v.2.18.3)
  • AnnotationFilter(v.1.18.0)
  • GenomicFeatures(v.1.46.4)
  • AnnotationDbi(v.1.56.2)
  • AnnotationHub(v.3.2.1)
  • BiocFileCache(v.2.2.1)
  • dbplyr(v.2.1.1)
  • pals(v.1.7)
  • forcats(v.0.5.1)
  • janitor(v.2.1.0)
  • dplyr(v.1.0.8)
  • scran(v.1.22.1)
  • scuttle(v.1.4.0)
  • SingleCellExperiment(v.1.16.0)
  • biomaRt(v.2.50.3)
  • celldex(v.1.4.0)
  • SingleR(v.1.8.1)
  • SummarizedExperiment(v.1.24.0)
  • Biobase(v.2.54.0)
  • GenomicRanges(v.1.46.1)
  • GenomeInfoDb(v.1.30.1)
  • IRanges(v.2.28.0)
  • S4Vectors(v.0.32.3)
  • BiocGenerics(v.0.40.0)
  • MatrixGenerics(v.1.6.0)
  • matrixStats(v.0.61.0)
  • plyr(v.1.8.6)
  • ggplot2(v.3.3.5)
  • data.table(v.1.14.2)
  • rmarkdown(v.2.11)

loaded via a namespace (and not attached):

  • rsvd(v.1.0.5)
  • ica(v.1.0-2)
  • svglite(v.2.1.0)
  • assertive.properties(v.0.0-4)
  • Rsamtools(v.2.10.0)
  • foreach(v.1.5.2)
  • lmtest(v.0.9-39)
  • crayon(v.1.5.0)
  • rhdf5filters(v.1.6.0)
  • spatstat.core(v.2.4-0)
  • MASS(v.7.3-55)
  • backports(v.1.4.1)
  • nlme(v.3.1-155)
  • rmdformats(v.1.0.3)
  • rlang(v.1.0.1)
  • XVector(v.0.34.0)
  • ROCR(v.1.0-11)
  • irlba(v.2.3.5)
  • limma(v.3.50.1)
  • filelock(v.1.0.2)
  • xgboost(v.1.5.2.1)
  • rjson(v.0.2.21)
  • bit64(v.4.0.5)
  • glue(v.1.6.1)
  • pheatmap(v.1.0.12)
  • sctransform(v.0.3.3)
  • parallel(v.4.1.2)
  • vipor(v.0.4.5)
  • spatstat.sparse(v.2.1-0)
  • spatstat.geom(v.2.3-2)
  • tidyselect(v.1.1.2)
  • phyloseq(v.1.38.0)
  • fitdistrplus(v.1.1-6)
  • XML(v.3.99-0.8)
  • tidyr(v.1.2.0)
  • assertive.types(v.0.0-3)
  • zoo(v.1.8-9)
  • GenomicAlignments(v.1.30.0)
  • xtable(v.1.8-4)
  • magrittr(v.2.0.2)
  • evaluate(v.0.15)
  • cli(v.3.2.0)
  • zlibbioc(v.1.40.0)
  • dbscan(v.1.1-10)
  • rstudioapi(v.0.13)
  • miniUI(v.0.1.1.1)
  • bslib(v.0.3.1)
  • rpart(v.4.1.16)
  • RcppEigen(v.0.3.3.9.1)
  • maps(v.3.4.0)
  • shiny(v.1.7.1)
  • BiocSingular(v.1.10.0)
  • xfun(v.0.29)
  • clue(v.0.3-60)
  • multtest(v.2.50.0)
  • cluster(v.2.1.2)
  • biomformat(v.1.22.0)
  • KEGGREST(v.1.34.0)
  • tibble(v.3.1.6)
  • interactiveDisplayBase(v.1.32.0)
  • ggrepel(v.0.9.1)
  • ape(v.5.6-2)
  • listenv(v.0.8.0)
  • permute(v.0.9-7)
  • Biostrings(v.2.62.0)
  • png(v.0.1-7)
  • future(v.1.24.0)
  • withr(v.2.4.3)
  • bitops(v.1.0-7)
  • assertive.base(v.0.0-9)
  • pracma(v.2.3.8)
  • dqrng(v.0.3.0)
  • pROC(v.1.18.0)
  • pillar(v.1.7.0)
  • GlobalOptions(v.0.1.2)
  • cachem(v.1.0.6)
  • GetoptLong(v.1.0.5)
  • DelayedMatrixStats(v.1.16.0)
  • vctrs(v.0.3.8)
  • ellipsis(v.0.3.2)
  • generics(v.0.1.2)
  • tools(v.4.1.2)
  • beeswarm(v.0.4.0)
  • munsell(v.0.5.0)
  • DelayedArray(v.0.20.0)
  • fastmap(v.1.1.0)
  • compiler(v.4.1.2)
  • abind(v.1.4-5)
  • httpuv(v.1.6.5)
  • rtracklayer(v.1.54.0)
  • ExperimentHub(v.2.2.1)
  • plotly(v.4.10.0)
  • GenomeInfoDbData(v.1.2.7)
  • gridExtra(v.2.3)
  • enrichR(v.3.0)
  • edgeR(v.3.36.0)
  • lattice(v.0.20-45)
  • deldir(v.1.0-6)
  • utf8(v.1.2.2)
  • later(v.1.3.0)
  • jsonlite(v.1.8.0)
  • multipanelfigure(v.2.1.2)
  • ScaledMatrix(v.1.2.0)
  • pbapply(v.1.5-0)
  • sparseMatrixStats(v.1.6.0)
  • lazyeval(v.0.2.2)
  • promises(v.1.2.0.1)
  • doParallel(v.1.0.17)
  • R.utils(v.2.11.0)
  • goftest(v.1.2-3)
  • spatstat.utils(v.2.3-0)
  • reticulate(v.1.24)
  • cowplot(v.1.1.1)
  • statmod(v.1.4.36)
  • webshot(v.0.5.2)
  • Rtsne(v.0.15)
  • pander(v.0.6.4)
  • dichromat(v.2.0-0)
  • uwot(v.0.1.11)
  • igraph(v.1.2.11)
  • survival(v.3.2-13)
  • yaml(v.2.3.5)
  • systemfonts(v.1.0.4)
  • htmltools(v.0.5.2)
  • memoise(v.2.0.1)
  • BiocIO(v.1.4.0)
  • locfit(v.1.5-9.5)
  • viridisLite(v.0.4.0)
  • digest(v.0.6.29)
  • assertthat(v.0.2.1)
  • mime(v.0.12)
  • rappdirs(v.0.3.3)
  • RSQLite(v.2.2.10)
  • future.apply(v.1.8.1)
  • mapproj(v.1.2.8)
  • vegan(v.2.5-7)
  • blob(v.1.2.2)
  • R.oo(v.1.24.0)
  • styler(v.1.6.2)
  • labeling(v.0.4.2)
  • splines(v.4.1.2)
  • Rhdf5lib(v.1.16.0)
  • ProtGenerics(v.1.26.0)
  • RCurl(v.1.98-1.6)
  • assertive.numbers(v.0.0-2)
  • ks(v.1.13.4)
  • hms(v.1.1.1)
  • rhdf5(v.2.38.0)
  • colorspace(v.2.0-3)
  • BiocManager(v.1.30.16)
  • ggbeeswarm(v.0.6.0)
  • shape(v.1.4.6)
  • assertive.files(v.0.0-2)
  • sass(v.0.4.0)
  • Rcpp(v.1.0.8)
  • bookdown(v.0.24)
  • RANN(v.2.6.1)
  • mvtnorm(v.1.1-3)
  • circlize(v.0.4.14)
  • fansi(v.1.0.2)
  • parallelly(v.1.30.0)
  • R6(v.2.5.1)
  • lifecycle(v.1.0.1)
  • rootSolve(v.1.8.2.3)
  • bluster(v.1.4.0)
  • curl(v.4.3.2)
  • leiden(v.0.3.9)
  • jquerylib(v.0.1.4)
  • snakecase(v.0.11.0)
  • Matrix(v.1.4-0)
  • RcppAnnoy(v.0.0.19)
  • RColorBrewer(v.1.1-2)
  • iterators(v.1.0.14)
  • stringr(v.1.4.0)
  • R.cache(v.0.15.0)
  • htmlwidgets(v.1.5.4)
  • beachmat(v.2.10.0)
  • polyclip(v.1.10-0)
  • purrr(v.0.3.4)
  • gridGraphics(v.0.5-1)
  • rvest(v.1.0.2)
  • mgcv(v.1.8-38)
  • globals(v.0.14.0)
  • spatstat.random(v.2.1-0)
  • codetools(v.0.2-18)
  • lubridate(v.1.8.0)
  • FNN(v.1.1.3)
  • metapod(v.1.2.0)
  • prettyunits(v.1.1.1)
  • MCMCprecision(v.0.4.0)
  • R.methodsS3(v.1.8.1)
  • RSpectra(v.0.16-0)
  • gtable(v.0.3.0)
  • DBI(v.1.1.2)
  • highr(v.0.9)
  • tensor(v.1.5)
  • httr(v.1.4.2)
  • KernSmooth(v.2.23-20)
  • stringi(v.1.7.6)
  • progress(v.1.2.2)
  • farver(v.2.1.0)
  • reshape2(v.1.4.4)
  • diptest(v.0.76-0)
  • viridis(v.0.6.2)
  • magick(v.2.7.3)
  • xml2(v.1.3.3)
  • combinat(v.0.0-8)
  • BiocNeighbors(v.1.12.0)
  • restfulr(v.0.0.13)
  • ade4(v.1.7-18)
  • scattermore(v.0.8)
  • BiocVersion(v.3.14.0)
  • bit(v.4.0.4)
  • spatstat.data(v.2.1-2)
  • pkgconfig(v.2.0.3)
  • knitr(v.1.37)

05 April 2022